Data visualization - Plotly Exercises - ex.7 - Adam Trentowski, 162602
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import load_wine, load_iris
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color='petal_length')
fig.show()
df = pd.DataFrame(data=load_wine().data, columns=load_wine().feature_names)
df['class'] = load_wine().target
fig = px.scatter(df, x="alcohol", y="color_intensity", color='hue')
fig.show()
data_canada = px.data.gapminder().query("country == 'Canada'")
fig = px.bar(data_canada, x='year', y='pop')
fig.show()
mean_alcohol_by_class = df.groupby('class')['alcohol'].mean().reset_index()
fig = px.bar(mean_alcohol_by_class, x='class', y='alcohol',
labels={'class': 'Class of Wine', 'alcohol': 'Average Alcohol (%)'},
title='Average alcohol content in each wine class',
color='alcohol',
text='alcohol')
fig.show()
df = px.data.tips()
fig = px.pie(df, values='tip', names='day')
fig.show()
df = pd.DataFrame(data=load_wine().data, columns=load_wine().feature_names)
df['class'] = load_wine().target
class_counts = df['class'].value_counts()
fig = px.pie(values=class_counts,
names=class_counts.index,
title='Classes in dataset',
color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1
fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))
# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target
fig = go.Figure()
for class_id in df['class'].unique():
class_data = df[df['class'] == class_id]['sepal width (cm)']
fig.add_trace(go.Histogram(x=class_data, name=f'Class {class_id}'))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()
df = px.data.tips()
fig = px.box(df, x="time", y="total_bill")
fig.show()
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target
fig = px.box(df, x="class", y="sepal length (cm)")
fig.show()
mesh_size = .02
margin = 0.25
# Load and split data
X, y = make_moons(noise=0.3, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(
X, y.astype(str), test_size=0.25, random_state=0)
# Create a mesh grid on which we will run our model
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)
# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='uniform')
clf.fit(X, y)
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
trace_specs = [
[X_train, y_train, '0', 'Train', 'square'],
[X_train, y_train, '1', 'Train', 'circle'],
[X_test, y_test, '0', 'Test', 'square-dot'],
[X_test, y_test, '1', 'Test', 'circle-dot']
]
fig = go.Figure(data=[
go.Scatter(
x=X[y == label, 0], y=X[y == label, 1],
name=f'{split} Split, Label {label}',
mode='markers', marker_symbol=marker
)
for X, y, label, split, marker in trace_specs
])
fig.update_traces(
marker_size=12, marker_line_width=1.5,
marker_color="lightyellow"
)
fig.add_trace(
go.Contour(
x=xrange,
y=yrange,
z=Z,
showscale=False,
colorscale='RdBu',
opacity=0.4,
name='Score',
hoverinfo='skip'
)
)
fig.show()
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target
X = df.iloc[:, :2].values # for 2D
y = df['class'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
mesh_size = .02
margin = 0.25
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)
clf = KNeighborsClassifier(n_neighbors=15, weights='uniform')
clf.fit(X_train, y_train)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig = go.Figure()
symbols = ['circle', 'square', 'diamond']
symbols_open = ['circle-open', 'square-open', 'diamond-open']
colors = ['blue', 'red', 'green']
for i in range(3):
fig.add_trace(go.Scatter(x=X_train[y_train == i, 0], y=X_train[y_train == i, 1],
mode='markers', marker_symbol=symbols[i],
marker_color=colors[i], name=f'Train Class {i}'))
fig.add_trace(go.Scatter(x=X_test[y_test == i, 0], y=X_test[y_test == i, 1],
mode='markers', marker_symbol=symbols_open[i],
marker_line_color='black', marker_color=colors[i],
name=f'Test Class {i}'))
fig.add_trace(go.Contour(x=xrange, y=yrange, z=Z, showscale=False,
line_width=0, colorscale='Viridis', opacity=0.5))
fig.update_layout(title='KNN Classification on Iris Dataset with 2 Features',
xaxis=dict(title='Sepal Length (cm)'),
yaxis=dict(title='Sepal Width (cm)'),
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
fig.show()